################################################################################
#### Title: Receivers 24 Data Cleaning
#### Portion of the Analysis: Data Cleaning for Study 2 Data
################################################################################

##########
# Set Up #
##########

# Clear global environment 
rm(list=ls())

# Set working directory 

setwd("/Users/martinnaunov/Desktop/Desktop - Martin’s MacBook Pro (2)/Persuasion/1_rawdata")


# Load packages
library(tidyverse)
library(scales)

# Read in data from Connect 2024 Study
df <- read.csv("receivers_2_raw.csv")

#############################
# Preliminary Data Cleaning #
#############################

# Get rid of the first two rows 
df <- df %>% 
  slice(-c(1,2))

# Reconsider column types 
df <- type_convert(df)

# Get rid of survey preview rows 
df <- df %>% 
  filter(Status == 0)

# Rename relevant variables (matching names to those used in Study 1) 

df <- df %>% 
  rename(
    pre_imm = Q2.1_1, 
    pre_trans = Q3.1_1, 
    pre_env = Q4.1_1, 
    partyid_5 = Q5.1, 
    repub_strength = Q5.2, 
    demo_strength = Q5.3, 
    party_lean = Q5.4, 
    att_check = Q6.1, 
    consent = Q7.1_1,
    post_env_anchor = Q8.3_1, 
    post_env_no_anchor = Q8.4_1, 
    reasoning_env = Q8.6,
    clarity_env = Q8.7,
    persuasive_env = Q8.8, 
    post_imm_anchor = Q9.3_1, 
    post_imm_no_anchor = Q9.4_1, 
    reasoning_imm = Q9.6, 
    clarity_imm = Q9.7, 
    persuasive_imm = Q9.8, 
    post_trans_anchor = Q10.3_1, 
    post_trans_no_anchor = Q10.4_1, 
    reasoning_trans = Q10.6, 
    clarity_trans = Q10.7, 
    persuasive_trans = Q10.8, 
    post_env_anchor_control = Q11.1_1, 
    post_env_no_anchor_control = Q11.2_1, 
    post_trans_anchor_control = Q12.1_1, 
    post_trans_no_anchor_control = Q12.2_1, 
    post_imm_anchor_control = Q13.1_1, 
    post_imm_no_anchor_control = Q13.2_1, 
    OE_persuasion = Q14.1, 
    edu = Q15.1, 
    female = Q15.2,
    age = Q15.3, 
    income = Q15.4, 
    state_residence = Q15.5, 
    race1 = Q15.6_1, 
    race2 = Q15.6_2, 
    race3 = Q15.6_3, 
    race4 = Q15.6_4, 
    race5 = Q15.6_5, 
    race6 = Q15.6_6, 
    race7 = Q15.6_7,
    respid_env = resp_id_env, 
    respid_trans = resp_id_trans,
    respid_imm = resp_id_imm, 
    )

# Check column classes 
var_classes <- sapply(df, class)
print(var_classes)


################
# Partisanship #
################

# Creatge a seven point partisanship variable with 1 as Strong Democrat and 7 as 
# Strong Republican. 

df <- df %>% 
  mutate(
    partyid_7 = case_when(
      demo_strength == 1 ~ 1, 
      demo_strength == 2 ~ 2, 
      party_lean == 2 ~ 3, 
      party_lean == 3 ~ 4, 
      party_lean == 1 ~ 5, 
      repub_strength == 2 ~ 6, 
      repub_strength == 1 ~ 7, 
      TRUE ~ NA_integer_
    )
  )

# Check: This is a function that can be used to check that the values for partyid_7
# match what they are intended to represent.

# Define the function
func_check <- function(df, var1, values1, var2, value2) {
  result <- df %>% 
    filter({{var1}} %in% values1  & 
             {{var2}} == value2) %>% 
    nrow()
  return(result)
}

# Strong Democrats
func_check(df, partyid_5, 1, demo_strength, 1) #671

# Not Very Strong Democrats
func_check(df, partyid_5, 1, demo_strength, 2) #526

# Lean Democrat
func_check(df, partyid_5, c(3,4,5,NA), party_lean, 2) #312

# Independent
func_check(df,  partyid_5, c(3,4,5,NA), party_lean, 3) #285

# Lean Republican
func_check(df, partyid_5, c(3,4,5,NA), party_lean, 1) #173

# Not Very Strong Republican
func_check(df, partyid_5, 2, repub_strength, 2) # 313

# Strong Republican 
func_check(df, partyid_5, 2, repub_strength, 1) # 252

# Check
table(df$partyid_7, useNA = "always") 


#########################
# Post-Argument Opinion #
#########################  

# Unlike in Study 1, the post-argument opinions in Study 2 are split 
# across four variables for issue. 

# For the environment, participant answers are stored in post_env_anchor (they are reminded
# of their previous issue position and see an argument), post_env_no_anchor (they are not 
# reminded of their previous issue position and see an argument), post_env_anchor_control
# (they are reminded of their previous position and don't see an argument) and 
# post_env_no_anchor_control (they are not reminded of their previous issue position and 
# do not see an argument). 

# This same structure applies to the immigration issue (post_imm_anchor, post_imm_no_anchor, 
# post_imm_anchor_control, and post_imm_no_anchor_control) and the transgender issue 
# (post_trans_anchor, post_trans_no_anchor, post_trans_anchor_control, post_trans_no_anchor_control). 

# To account for this, we need to coalesce the four variables that comprise each topic 
# into one post-issue opinion variable per topic (post_env, post_imm, and post_trans). 


# Coalesce variables per topic
df <- df %>% 
  mutate(
    post_env = coalesce(post_env_anchor, post_env_no_anchor, post_env_anchor_control, post_env_no_anchor_control), 
    post_imm = coalesce(post_imm_anchor, post_imm_no_anchor, post_imm_anchor_control, post_imm_no_anchor_control), 
    post_trans = coalesce(post_trans_anchor, post_trans_no_anchor, post_trans_anchor_control, post_trans_no_anchor_control)
  )


##############
# Issue Side #
##############

# Make a variable indicating the respondent's issue side on the environment
# called issueside_env, where 0-4 (conservative = 3), 5 (neutral = 2), 6-10 (liberal = 1). 

# Make another variable called issueside_imm indicating the respondent's issue side on
# on the immigration where 0-4 (conservative = 3), 5 (neutral = 2), 6-10 (liberal = 1)

# Finally, make a variable called issueside_trans indicating the respondent's issue side on 
# transgender rights. Unlike, the previous two issues, a score of 0-4 indicates a liberal position
# a score of 5 indicates a neutral position, and a score of 6-10 indicates a conservative position. 
# To account for this reverse code the pre_trans and post_trans opinion variables and create a 
# new variable called issueside_trans where a score of 0-4 is now conservative (= 3), 5 is still 
# neutral (= 2) and a score ranging from 6-10 is liberal (= 1)


# Check three pre opinion variables
apply(df[c("pre_env", "pre_imm", "pre_trans")], 2,  function(x) table(x, useNA = "always"))

df <- df %>% 
  mutate(
    across(c(pre_trans, post_trans), ~recode(as.numeric(.),
                                             "0" = 10, 
                                             "1" = 9, 
                                             "2" = 8, 
                                             "3" = 7, 
                                             "4" = 6, 
                                             "5" = 5, 
                                             "6" = 4, 
                                             "7" = 3,
                                             "8" = 2,
                                             "9" = 1, 
                                             "10" = 0))
  ) %>% 
  mutate(
    issueside_env = case_when(
      pre_env > 5 ~ 1, 
      pre_env == 5 ~ 2, 
      pre_env < 5 ~ 3, 
      TRUE ~ NA_integer_
    ), 
    issueside_imm = case_when(
      pre_imm > 5 ~ 1, 
      pre_imm == 5 ~ 2, 
      pre_imm < 5 ~ 3, 
      TRUE ~ NA_integer_
    ), 
    issueside_trans = case_when(
      pre_trans > 5 ~ 1, 
      pre_trans == 5 ~ 2, 
      pre_trans < 5 ~ 3, 
      TRUE ~ NA_integer_
    )
  ) %>% 
  mutate(
    across(c(issueside_env, issueside_imm, issueside_trans), as.factor)
  )

# Check that issue side is correctly categorized by issue

# Environment
table(df$pre_env, df$issueside_env, useNA = "always")

# Immigration
table(df$pre_imm, df$issueside_imm, useNA = "always")

# Transgender Rights
table(df$pre_trans, df$issueside_trans, useNA = "always")


#################
# Argument Side #
#################
# Classify the direction (ideologically) of the argument the person read.
# We systematically assigned each argument a unique API code (api_env, api_imm, api_trans)
# which we used for randomizatation in Qualtrics
# and which we use here to code the argument's side.

### Environment ###
# api 1:51 are conservative arguments (2) and 
# api 52:169 are liberal arguments (1)

### Immigration ## 
# api 170:215 are conservative arguments (2)
# api 216:299 are liberal arguments (1)

### Transgender Rights ### 
# api 300:352 are conservative arguments (2)
# api 353:400 are liberal arguments (1)

df <- df %>%
  mutate(
    argueside_env = case_when(
      api_env >= 52 ~ 1, # Liberal
      api_env <= 51 ~ 2, # Conservative
      TRUE ~ NA_integer_
    ), 
    argueside_imm = case_when(
      api_imm >= 216 ~ 1, # Liberal
      api_imm <= 215 ~ 2, # Conservative
      TRUE ~ NA_integer_
    ), 
    argueside_trans = case_when(
      api_trans >= 353 ~ 1, # Liberal 
      api_trans <= 352 ~ 2, # Conservative
      TRUE ~ NA_integer_
    )
  ) %>% 
  mutate(
    across(c(argueside_env, argueside_imm, argueside_trans), as.factor)
  )

# Check that those who took conservative liberal (1), neutral (2), or conservative (3) pre-argument position saw a 
# liberal (1) or conservative (2) position. Of course, for those with neutral pre-issue positions
# the respondents should be randomized to either see a liberal (1) or conservative (2) argument

# Environment
table(df$argueside_env, df$issueside_env, useNA = "always")

# Immigration
table(df$argueside_imm, df$issueside_imm, useNA = "always")

# Transgender Rights 
table(df$argueside_trans, df$issueside_trans, useNA = "always")



#####################
# Argument Movement #
#####################

#Creating variables (quarkelt 1 and quarkelt 2) defined by the change in Receiver issue opinion (post-argument exposure compared to pre-). 
# coded such that positive values represent change in the Sender’s intended direction
# and negative values represent “backfire” (the Receiver moved away from the Sender’s opinion on the issue).

# I.e. a positive change in time1 to time2 score always indicates movement in the direction 
# of the argument.

# For instance, a respondent with a liberal response (6-10), will see a conservative 
# argument. A lower number at time 2 would indicate movement in the direction of the 
# the argument but the T2 to T1 difference would be negative. Accordingly, Liberals would 
# receive a T1 score of 10 - pre_issueopinion and a T2 score of 10 - post_issueopinion. 
# In this way, if a respondent, for instance, answers an 8, they will receive a T1 score 
# of 2, and if they move in the conservative (intended) direction to say, a 6, they will receive a T2
# score of 4. Now a difference between T2 and T1 would be positive indicating movement in the direction 
# of the argument.If a liberal respondent moves from 8 to 9, however
# then the T2 - T1 difference would be negative
# (10-9) - (10-8)=1-2=-1 (indicating a backlash)


df <- df %>% 
  mutate(
    quarkelt1_env = case_when(
      pre_env <= 5 ~ pre_env, # Conservatives and Neutrals
      pre_env > 5 ~ 10 - pre_env, # Liberals
      TRUE ~ NA_integer_
    ), 
    quarkelt2_env = case_when(
      pre_env < 5 ~ post_env, # Conservatives 
      pre_env > 5 ~ 10 - post_env, # Liberals
      pre_env == 5 & argueside_env == 1 ~ post_env, # Neutrals who saw a liberal argument
      pre_env == 5 & argueside_env == 2 ~ 10 - post_env, # Neutrals who saw a conservative argument
      TRUE ~ NA_integer_
    ), 
    quarkelt1_imm = case_when(
      pre_imm <= 5 ~ pre_imm, # Conservatives and Neutrals
      pre_imm > 5 ~ 10 - pre_imm, # Liberals
      TRUE ~ NA_integer_
    ), 
    quarkelt2_imm = case_when(
      pre_imm < 5 ~ post_imm, # Conservatives 
      pre_imm > 5 ~ 10 - post_imm, # Liberals
      pre_imm == 5 & argueside_imm == 1 ~ post_imm, # Neutrals who saw a liberal argument
      pre_imm == 5 & argueside_imm == 2 ~ 10 - post_imm, # Neutrals who saw a conservative argument
      TRUE ~ NA_integer_
    ), 
    quarkelt1_trans = case_when(
      pre_trans <= 5 ~ pre_trans, # Conservatives and Neutrals (Because these variables have been reverse coded)
      pre_trans > 5 ~ 10 - pre_trans, #Liberals
      TRUE ~ NA_integer_
    ), 
    quarkelt2_trans = case_when(
      pre_trans < 5 ~ post_trans, # Conservatives
      pre_trans > 5 ~ 10 - post_trans, # Liberals
      pre_trans == 5 & argueside_trans == 1 ~ post_trans, # Neutrals who saw a liberal argument 
      pre_trans == 5 & argueside_trans == 2 ~ 10 - post_trans, # Neutrals who saw a conservative argument
      TRUE ~ NA_integer_
    )
  )


###################
# Binary Movement #
################### 

# Make a binary variable for each topic where movement in the direction of the 
# argument is coded as a 1, and no movement, or movement away from the direction of 
# the argument (backlash) is coded as a 0. 

### Alternative Coding
df <- df %>% 
  mutate(
    binary_env = case_when(
      quarkelt2_env > quarkelt1_env ~ 1, 
      quarkelt2_env <= quarkelt1_env ~ 0, 
      TRUE ~ NA_integer_
    ), 
    binary_imm = case_when(
      quarkelt2_imm > quarkelt1_imm ~ 1, 
      quarkelt2_imm <= quarkelt1_imm ~ 0, 
      TRUE ~ NA_integer_
    ), 
    binary_trans = case_when(
      quarkelt2_trans > quarkelt1_trans ~ 1, 
      quarkelt2_trans <= quarkelt2_trans ~ 0, 
      TRUE ~ NA_integer_)
  ) %>% 
  mutate(
    across(c(binary_env, binary_imm, binary_trans), as.factor)
  )


#######################################################
# Movement in Favor of the Argument, Backlash, Stasis #
#######################################################

# Make a variable that is coded as a 1 for movement in favor of the argument
# (if quarkelt2 > quarkelt1), stasis (quarkelt1 == quarkelt2), and 
# backlash (quarkelt2 < quarkelt1) 
df <- df %>% 
  mutate(
    movement_env = case_when(
      quarkelt2_env > quarkelt1_env ~ 1, 
      quarkelt2_env == quarkelt1_env ~ 0, 
      quarkelt2_env < quarkelt1_env ~ -1, 
      TRUE ~ NA_integer_
    ), 
    movement_imm = case_when(
      quarkelt2_imm > quarkelt1_imm ~ 1, 
      quarkelt2_imm == quarkelt1_imm ~ 0, 
      quarkelt2_imm < quarkelt1_imm ~ -1,
      TRUE ~ NA_integer_
    ), 
    movement_trans = case_when(
      quarkelt2_trans > quarkelt1_trans ~ 1, 
      quarkelt2_trans == quarkelt1_trans ~ 0, 
      quarkelt2_trans < quarkelt1_trans ~ -1, 
      TRUE ~ NA_integer_
    )
  ) %>% 
  mutate(
    across(c(movement_env, movement_imm, movement_trans), as.factor)
  )



#################
# Respondent ID #
#################
df <- df %>% 
  mutate(
    respondent = as.numeric(row_number())
    )



############################### DEMOGRAPHICS ###################################

#############
# Education #
#############
### Education ###
# Change Education to (1) (did not attend high school = 1, Some high school, did not graduate = 2), 
# (2) High school (High school graduate), (3) Some College (Some college = 4, Associate's Degree = 5), 
# (4) college (5-Bachelor's Degree), 
# (5) Advanced Degree (Master's (6), Doctoral (7), Professional Degree (8))

# Check 
df_check <- df 

df <- df %>% 
  mutate(
    edu = case_when(
      edu %in% c(1, 2) ~ 1,
      edu == 3 ~ 2,
      edu %in% c(4, 5) ~ 3,
      edu == 6 ~ 4,
      edu %in% c(7, 8, 9) ~ 5,
      TRUE ~ NA_integer_
    )
  )

##########
# Female #
##########

# Create two variables for female. One that converts female (2) to 1 and everything 
# else to 0, and another that converts female (2) to 1 and then converts Neither of 
# the above describes me well (3) to NA. 

# Check 
df_check <- df

df <- df %>% 
  mutate(
    female_main = case_when(
      female == 1 ~ 0,
      female == 2 ~ 1, 
      female == 3 ~ 0, 
      TRUE ~ NA_integer_
      ), 
    female_alt = case_when(
      female == 1 ~ 0,
      female == 2 ~ 1, 
      TRUE ~ NA_integer_
      )
    )

########
# Race #
########
# Check the pre-manipulation race variables
apply(df[c("race1", "race2", "race3", "race4", "race5", "race6", "race7")], 2,  function(x) table(x, useNA = "always"))


# CHANGE:
# Collapse the race variable into one variable names race where the values are 
# as follows. White (1), Black (2), Hispanic, Latino (3), 
# Asian (4), American Indian, Native American, or Alaska Native (5), Native Hawaiian or Pacific Islander (6), 
# Other, not listed (7)
df <- df %>% 
  mutate(
    race = case_when(
      race7 == 1 ~ 7, 
      race6 == 1 ~ 6, 
      race5 == 1 ~ 5, 
      race4 == 1 ~ 4, 
      race3 == 1 ~ 3, 
      race2 == 1 ~ 2, 
      race1 == 1 ~ 1,
      TRUE ~ NA_integer_
    )
  )


##########
# EXPORT #
########## 

# Specify the folder path
folder_path <- "/Users/martinnaunov/Desktop/Desktop - Martin’s MacBook Pro (2)/Persuasion/2_workingdata/"

# Create the full file path including the filename and extension 
file_path <- file.path(folder_path, "receivers_2_working_wide.csv")

# Export the dataframe to CSV 
write.csv(df, file = file_path, row.names = FALSE)

# Check if file exists 
file.exists(file_path) 




